---
title: "The Dual Effect of Social Ties on COVID-19 spread in Japan"
subtitle: "Facebook Movement Replication Code"
date: "December 22, 2020"
author: "Timothy Fraser & Daniel P. Aldrich"

output:
  html_document:
    toc: yes
    theme: united
    df_print: paged
---


# 1. Facebook Data

## 1.1 Gather Data

The following code demonstrates how we downloaded the data from Facebook. Facebook permits us to share data derivatives, but not the original raw data. Fortunately, this study performs models on the data derivatives - prefectural tallies of neighborhood-level movement - not the original neighborhood-level data. As a result, we summarize our process for aggregating these to the prefectural label below, in code chunks that have been hashtagged out as comments, since they can't be directly replicated. We provide the prefectural dataset for analysis, however, and anyone is welcome to replicate our models on it.

```{r, eval = FALSE}

# Covid May 13 - Sept 14
#base <- "https://www.facebook.com/geoinsights-portal/downloads/vector/?id=ANONYMIZED&ds=2020-"
#"05-13+0000"


#searches <- expand_grid(
#  base,
#  month = c(5:9) %>% str_pad(2,"left","0"),
#  day = c(1:31) %>% str_pad(2,"left","0"),
#  time = c("0000", "0800", "1600")
#) %>%
#  mutate(url = paste(base, month, "-", day, "+", time, sep = "")) %>%
#  select(url) %>%
#  unlist() %>% unname() 

#searches %>% 
#  lapply(browseURL)
```


## 1.2. Aggregate to Prefecture/County level

This is the code that was used to aggregate from the neighborhood level to the prefectural level. Although we cannot share the original neighborhood level data necessary for this code, we can share the derivative prefectural level data that is producted at the end of this section. For this reason, each of these sections are set to eval = FALSE, meaning they will not run presently.

```{r}
# Compile all the many files worth of mobility data
dir("japan_fb_moves", full.names = TRUE) %>%
  lapply(read_csv) %>%
  bind_rows() %>%
  distinct() %>%
  arrange(date_time) %>%
  saveRDS("japan_fb_moves.rds")
```


```{r, eval = FALSE}
# Load the North East Asia Albers Equal Area Conic Project
eqac = "+proj=aea +lat_1=15 +lat_2=65 +lat_0=30 +lon_0=95 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"

```


```{r, message = FALSE, warning = FALSE, eval = FALSE}
# Next, Import Japan municipal shapefile 

# Gather a list of prefectures
pref = read_csv("indices_wide.csv") %>% 
  mutate(pref_code = muni_code %>% str_sub(1,2)) %>%
  select(pref_code, pref) %>%
  distinct() %>%
  filter(!is.na(pref))

# Read in our shapefile of municipal political boundaries
jp = read_sf("japan_shapefile/polbnda_jpn.shp") %>%
  # Transform to Asia North Albers Equal Area Conic projection
  # Obtained here: https://spatialreference.org/ref/esri/102025/
  st_transform(
    CRS(eqac)) %>%
  # now add prefecture name
  mutate(pref_code = adm_code %>% str_sub(1,2)) %>%
  left_join(by = "pref_code", y = pref) %>%
  # Now get rid of missing adm_code
  filter(!is.na(pref)) %>%
  # Now grab just the main entries
  select(geometry, pref, pref_code, muni_code = adm_code)
```



```{r, eval = FALSE}
# Extract length of total dataset

# Give each row (edge) a unique id number
read_rds("japan_fb_moves.rds") %>%
  mutate(id = 1:n()) %>%
  saveRDS("japan_fb_moves.rds")

# Now extract the number of total rows in the dataset
n = (read_rds("japan_fb_moves.rds") %>% dim())[1]

chunks = (n/20) %>% round(0)

# If we take 20 chunks, then each chunk should start with the following case number...
nums = (1:20 - 1) * (chunks)
```

```{r, eval = FALSE, message = FALSE, warning = FALSE}
# Create a folder to receive aggregated files
dir.create("japan_fb_move_aggregator")

# Write a function to aggregate files
process = function(number){
  
  print(number)
  
  # Import a chunk of edge data
  dat <- read_rds("japan_fb_moves.rds") %>%
    slice(number:(number + chunks)) %>%
    select(
      id,
      date_time, 
      n_crisis, n_baseline,
      start_lon, start_lat,
      end_lon, end_lat)
  
  
  ##########################################
  # Assign prefecture to startpoints 
  ##########################################
  
  
  #  Second, let's identify the municipality and prefecture codes
  # for the start and end points of each edge.
  start = dat %>%
    select(id, start_lon, start_lat) %>%
    # Next, let's grab the starting coordinates of the move
    # and register these as the geography of the point
    st_as_sf(coords = c("start_lon", "start_lat")) %>%
    # Set initial projection as World Map,
    st_set_crs(4326) %>%
    # And then reformat to Albers North Asia Equal-AreaConic projection
    st_transform(CRS(eqac)) %>%
    # Last, we join the traits of the polygons from jp to the points that lie within them
    st_join(jp %>%
              select(start_pref = pref, 
                     start_pref_code = pref_code, 
                     start_muni_code = muni_code)) %>%
    as.data.frame() %>% select(-geometry)
  
  ##########################################
  # Assign prefecture to endpoints 
  ##########################################
  
  end =   dat %>%
    select(id, end_lon, end_lat) %>%
    # Next, let's grab the ending coordinates of the move
    # and register these as the geography of the point
    st_as_sf(coords = c("end_lon", "end_lat")) %>%
    # Set initial projection as World Map,
    st_set_crs(4326) %>%
    # And then reformat to Albers North Asia Equal-AreaConic projection
    st_transform(CRS(eqac)) %>%
    # Last, we join the traits of the polygons from jp to the points that lie within them
    st_join(jp %>%
              select(end_pref = pref, 
                     end_pref_code = pref_code, 
                     end_muni_code = muni_code)) %>%
    as.data.frame() %>% select(-geometry)
  # Important note: st_join() works with polygons, but not multipolygons,
  # so we must JOIN BEFORE using st_union(), which merges polygons
  
  #####################################################################
  # Join in the those codes and aggregate to prefectural level
  #####################################################################
  
  dat %>%
    # Join in the new municipal and prefectural codes for each point
    left_join(by = "id", start) %>%
    left_join(by = "id", end) %>%
    # Now aggregate the baseline & crisis movement of users for each time-stamp
    group_by(start_pref_code, end_pref_code, date_time) %>%
    summarize(n_baseline = sum(n_baseline, na.rm = TRUE),
              n_crisis = sum(n_crisis, na.rm = TRUE)) %>%
    ungroup() %>%
    mutate(start_pref_code = if_else(is.na(start_pref_code), "other", start_pref_code),
           end_pref_code = if_else(is.na(end_pref_code), "other", end_pref_code)) %>%
    # Now filter out any movement from places outside Japan and places in Japan
    # We can't really accurately assess this in our current model,
    # So it's best just to focus on internal movement in Japan
    filter(start_pref_code != "other",
           end_pref_code != "other") %>%
    # Export to file
    saveRDS(paste("japan_fb_move_aggregator/", number, ".rds", sep = ""))
}

# Now run this a bunch of times, once per chunk
nums %>%
  lapply(process)

# Now bind them all together and resave
dir("japan_fb_move_aggregator", full.names = TRUE) %>%
  lapply(read_rds) %>%
  bind_rows() %>%
  # Break into dates, not 8-hour chunks
    mutate(date = cut(date_time, breaks = "days") %>% lubridate::ymd()) %>%
  # Now, because we aggregated in chunks, 
  # we need to RE-AGGREGATE one final time
  group_by(start_pref_code, end_pref_code, date) %>%
  summarize(n_baseline = sum(n_baseline, na.rm = TRUE),
            n_crisis = sum(n_crisis, na.rm = TRUE)) %>%
  # Finally, export to file
  saveRDS("japan_fb_move_prefecture.rds")
```



```{r, eval = FALSE, message = FALSE, warning = FALSE}
# Create a complete temporal edgelist
expand_grid(start = pref$pref_code, 
            end = pref$pref_code,
            # Repeat this sequence for each day with data available
            date = seq(
              from = min(read_rds("japan_fb_move_prefecture.rds")$date, na.rm = TRUE) %>% as.Date(), 
              to = max(read_rds("japan_fb_move_prefecture.rds")$date, na.rm = TRUE) %>% as.Date(),
              by= "days")) %>%
  # Join in baseline and crisis movement values 
  # for each prefecture pair per date
  left_join(by = c("start" = "start_pref_code", 
                   "end" = "end_pref_code",
                   "date"= "date"),
            y = read_rds("japan_fb_move_prefecture.rds")) %>%
  # If entries are blank, set to zero
  mutate(n_baseline = if_else(is.na(n_baseline), 0, n_baseline),
         n_crisis = if_else(is.na(n_crisis), 0, n_crisis)) %>%
  ungroup() %>%
  # If we had any duplicate rows, remove them
  distinct() %>%
  # Now write to file
  saveRDS("japan_temporal_edgelist.rds")

```


## 1.3. Get prefecture measures of movement

```{r, message = FALSE, warning = FALSE, EVAL = FALSE}
# Now, we want to create single measures per prefecture
# for each timestamp, describing how many people are moving


#############################################
# SAME-PREFECTURE inter-neighborhood movement
#############################################

# Sheer Crisis Movement
read_rds("japan_temporal_edgelist.rds") %>% 
  # Identify only inter-neighborhood movement WITHIN THE SAME prefecture
  filter(start == end) %>%
  select(date, end, n_crisis) %>%
  # Export
  saveRDS("japan_same_prefecture_movement_crisis.rds")


# Difference in Crisis Movement Relative to Baseline
read_rds("japan_temporal_edgelist.rds") %>% 
  # Identify only inter-neighborhood movement WITHIN THE SAME prefecture
  filter(start == end) %>%
  group_by(date) %>%
  # How many MORE people are moving after the disaster?
  # How many FEWER people are moving after the disaster
  mutate(difference = n_crisis - n_baseline) %>%
  # Keep only EXCESS POSITIVE movement
  mutate(difference = if_else(difference > 0, abs(difference), 0)) %>%
  select(date, end, difference) %>%
  # Export
  saveRDS("japan_same_prefecture_movement_increase.rds")


# Difference in Crisis Movement Relative to Baseline
read_rds("japan_temporal_edgelist.rds") %>% 
  # Identify only inter-neighborhood movement WITHIN THE SAME prefecture
  filter(start == end) %>%
  group_by(date) %>%
  # How many MORE people are moving after the disaster?
  # How many FEWER people are moving after the disaster
  mutate(difference = n_crisis - n_baseline) %>%
  # Keep only EXCESS NEGATIVE movement
  mutate(difference = if_else(difference < 0, abs(difference), 0)) %>%
  select(date, end, difference) %>%
  # Export
  saveRDS("japan_same_prefecture_movement_decrease.rds")


# Calculate total ADDITIONAL people moving between neighborhoods in different prefectures TO THIS PREFECTURE,
# relative to how many people did before the crisis
read_rds("japan_temporal_edgelist.rds") %>% 
  # Identify only inter-neighborhood movement BETWEEN DIFFERENT prefectures
  filter(start != end) %>%
  # How many MORE people are moving after the disaster?
  # How many FEWER people are moving after the disaster
  mutate(difference = n_crisis - n_baseline) %>%
  # Keep only EXCESS POSITIVE movement
  mutate(difference = if_else(difference > 0, difference, 0)) %>%
  # Now summarize to just the DESTINATION prefecture (the end point of movement)
  group_by(date, end) %>%
  summarize(difference = sum(difference, na.rm = TRUE)) %>%
  select(date, end, difference) %>%
  # Export
  saveRDS("japan_between_prefecture_movement_increase.rds")


# Calculate total ADDITIONAL people moving between neighborhoods in different prefectures TO THIS PREFECTURE,
# relative to how many people did before the crisis
read_rds("japan_temporal_edgelist.rds") %>% 
  # Identify only inter-neighborhood movement BETWEEN DIFFERENT prefectures
  filter(start != end) %>%
  # How many MORE people are moving after the disaster?
  # How many FEWER people are moving after the disaster
  mutate(difference = n_crisis - n_baseline) %>%
  # Keep only EXCESS NEGATIVE movement (recoded to become positive)
  mutate(difference = if_else(difference < 0, abs(difference), 0)) %>%
  # Now summarize to just the DESTINATION prefecture (the end point of movement)
  group_by(date, end) %>%
  summarize(difference = sum(difference, na.rm = TRUE)) %>%
  select(date, end, difference) %>%
  # Export
  saveRDS("japan_between_prefecture_movement_decrease.rds")


# Calculate total ADDITIONAL people moving between neighborhoods in different prefectures TO THIS PREFECTURE,
# relative to how many people did before the crisis
read_rds("japan_temporal_edgelist.rds") %>% 
  # Identify only inter-neighborhood movement BETWEEN DIFFERENT prefectures
  filter(start != end) %>%
  # Now summarize to just the DESTINATION prefecture (the end point of movement)
  group_by(date, end) %>%
  summarize(n_crisis = sum(n_crisis, na.rm = TRUE)) %>%
  select(date, end, n_crisis) %>%
  # Export
  saveRDS("japan_between_prefecture_movement_crisis.rds")

```

## 1.4 Weekly Average Facebook User Mobility

```{r, message = FALSE, warning = FALSE}

# Difference in Crisis Movement Relative to Baseline
read_rds("japan_temporal_edgelist.rds") %>% 
  # Identify only inter-neighborhood movement WITHIN THE SAME prefecture
  filter(start == end) %>%
  group_by(date) %>%
  # How many MORE people are moving after the disaster?
  # How many FEWER people are moving after the disaster
  mutate(difference = n_crisis - n_baseline) %>%
  select(date, end, difference) %>%
  # Now calaculate average weekly movement
  group_by(pref = end, week = cut(date, breaks = "week", start.on.monday = TRUE)) %>%
  summarize(difference = mean(difference, na.rm = TRUE)) %>%
  # Export
  saveRDS("japan_same_prefecture_movement.rds")


```
